Source Data: US domestic flights from 1990 to 2009, US Census Bureau

Actions

Seattle is a good candidate for capacity increase

  • Seattle is 2nd most full west coast airport (behind SFO)
    • 8th most full in US
Airport Code City Percent Seats Filled
HNL Honolulu, HI 83.11
PBI West Palm Beach, FL 82.03
MCO Orlando, FL 81.64
MIA Miami, FL 81.42
SFO San Francisco, CA 81.10
FLL Fort Lauderdale, FL 80.57
TPA Tampa, FL 80.38
SEA Seattle, WA 80.25
LAS Las Vegas, NV 80.20
IAH Houston, TX 79.48

Specific Routes

  • Seattle had two of the top three most-filled routes
Origin Destination Percent Seats Filled
Nashville, TN Seattle, WA 95.03
Springfield, MO Las Vegas, NV 94.86
Seattle, WA Charlotte, NC 94.43
All Routes Median: 75.28

Significant savings potential

If Houston - Dallas had average rate of seats filled, it would have saved around 104,000 empty seats in 2009

Overall market data row

Overall passenger volume has decreased recently

Despite volume drop, operating efficiency is up

---
title: "2009 Domestic Flight Analysis"  
date: "2/27/2020"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: fill
    source: embed
    theme: yeti
    
---

``` {r echo = FALSE}
# Course: 5210 Communicating Data
# Purpose: General Mills Cereal Sales Analysis
# Date: 2/13/20
# Author: Eric Hestekin, Schyuler Lujan
# Repo:  https://github.com/ehestekin/MSBA_5210_DataVis
```

``` {r echo = FALSE, include = FALSE}
# Clear environment of variables and functions
rm(list = ls(all = TRUE)) 
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)
```

``` {r setup, include = FALSE, warning = FALSE}
# Load Library
library(tidyverse)
library(patchwork)
library(scales)

#this forces kable html outputs (avoids format = 'html' in each call)
options(knitr.table.format = "html")

```

Source Data:  [US domestic flights from 1990 to 2009, US Census Bureau](http://academictorrents.com/details/a2ccf94bbb4af222bf8e69dad60a68a29f310d9a)

```{r, warning = FALSE}
#Load Data TSV
#no column headers so set array of names
col_labels = c('origin_code',
               'dest_code',
               'origin_city',
               'dest_city',
               'passengers',
               'seats',
               'num_flights',
               'distance',
               'year_month',
               'origin_pop',
               'dest_pop')

flights_df <- read_tsv('flight_edges.tsv',col_names = col_labels)
```

```{r, include = FALSE}
#Tidy


#separate month_year
#first 4 chars are year, convert from string to num
flights_df <- flights_df %>% 
  separate(year_month, into = c('year','month'), sep = 4, convert = TRUE) 

#add percentage of seats filled for each flight
#and remove rows where seats are zero (no flight data)
# and also remove flights where seat/flight < 4 (non commercial flight)
com_flights_df <- flights_df %>%
  mutate(seat_fill_perc = (passengers/seats) * 100) %>% 
  filter(seats != 0) %>% 
  filter(seats/num_flights > 4)

#also throw out seat filled percentages > 100
#this shouldn't be possible so don't trust data point
com_flights_df <- filter(com_flights_df, seat_fill_perc <= 100)

#get summary now with tidy commercial flights data set
summary(com_flights_df)
```



Actions 
----------

### Seattle is a good candidate for capacity increase {data-width=83}

+ Seattle is ***2nd most full*** west coast airport (behind SFO)
    + 8th most full in US

```{r, include = F}
com_flights_by_city <- com_flights_df %>% 
  group_by(origin_code, year) %>% 
  summarize(tot_passenger_departs = sum(passengers),
            origin_pop = mean(origin_pop), #still want origin pop data.  mean should just return the pop
            avg_seat_fill = sum(passengers)/sum(seats) * 100,
            pass_pop_ratio = tot_passenger_departs / origin_pop) %>% 
  left_join(distinct(select(com_flights_df, origin_code, origin_city)), by = 'origin_code')

#limit to cities with significant departures (>100 passengers daily)
com_flights_by_city <- com_flights_by_city %>% filter(tot_passenger_departs > 36500)

com_flights_by_city_2009 <- com_flights_by_city %>% filter(year == 2009)

summary(com_flights_by_city)
```

```{r}
com_flights_by_city_2009 %>% 
  arrange(desc(avg_seat_fill)) %>% 
  filter(tot_passenger_departs > 1e6,
         avg_seat_fill > 79.475) %>% 
  select(origin_code, origin_city, avg_seat_fill) %>%
  knitr::kable(col.names = c('Airport Code',
                             'City',
                             'Percent Seats Filled'),
               align = 'c',
               digits = 2) %>% 
  kableExtra::row_spec(8, bold = T, 
                       color = '#a3a3a3', background = '#344182')
```

```{r, include = F}
route_data <- com_flights_df %>% 
  group_by(origin_city, dest_city, year) %>% 
  summarise(total_flights = sum(num_flights),
            total_passengers = sum(passengers),
            total_seats = sum(seats),
            avg_seat_fill_perc = mean(total_passengers / total_seats) * 100)

#lets look at 5 most popular routes from last year and see how it has trended over the previous 10 years
#only look at routes with more than a flight every other day
route_data_2009 <- route_data %>% filter(year == 2009, total_flights > 365/2)
```

### Specific Routes {data-width=83}

+ Seattle had two of the top three most-filled routes

```{r}

route_data_2009 %>% 
  arrange(desc(avg_seat_fill_perc)) %>% 
  filter(avg_seat_fill_perc > 94.4) %>% 
  select(origin_city, dest_city, avg_seat_fill_perc) %>%
  ungroup() %>% 
  add_row(origin_city = 'All Routes',
                     dest_city = 'Median:',
                     avg_seat_fill_perc = 75.284) %>% 
  knitr::kable(col.names = c('Origin',
                             'Destination',
                             'Percent Seats Filled'),
               align = rep('c', 3),
               digits = 2) %>% 
  kableExtra::row_spec(c(1,3), bold = T, 
                       color = '#a3a3a3', background = '#344182') %>% 
  kableExtra::kable_styling(full_width = T) %>% 
  kableExtra::row_spec(4, bold = T)

# summary(route_data_2009)
```

### Most popular routes, one stands out as needing improvement {data-width=117}

```{r}
five_busiest_routes_data <- route_data_2009 %>% 
  arrange(desc(total_passengers)) %>% 
  filter(total_passengers %in% 
           #manually select rows for now had a hard time filtering properly
           c(1501883, 1494141, 1399554, 1380928, 1352360)) %>% 
  mutate(route = paste(origin_city, ' - ', dest_city))

five_busiest_plot <-  
  ggplot(five_busiest_routes_data, 
         mapping = aes(x = route, 
                       y = avg_seat_fill_perc/100)) +
  geom_bar(stat = 'identity', 
           fill = c('grey','#750000','grey','grey','grey')) +
  geom_text(aes(x = route, y = avg_seat_fill_perc/100,
                label = scales::percent(avg_seat_fill_perc/100,
            accuracy = 1),
            fontface = c('plain','plain','plain','bold','plain')), 
            position = position_dodge(width = 0.9), 
            hjust = 1) +
  coord_flip() + theme_classic() +
  theme(axis.line.y = element_blank(),
        axis.line.x = element_blank(),
        axis.title.y  = element_blank(),
        plot.title = element_text(color = "#808080",
                                  size = 16, 
                                  face = "bold", hjust = 2.5),
        plot.subtitle = element_text(color = "#808080", 
                                     size = 12, 
                                     face = "plain", hjust = -1),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_text(color = "#808080", size = 11,
                                   face = c('plain','bold','plain','plain','plain')),
        legend.title = element_text(color = "#808080", size = 12, face = "plain"),
        legend.text = element_text(color = "#808080", size = 12, face = "plain"),
        plot.caption = element_text(color = "#808080", size = 10, face = "plain", margin = margin(t=10))
        ) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(title = 'Houston to Dallas flights are too empty',
       subtitle = 'Top 5 routes by passenger volume') +
  ylab('Percentage of Seats Filled')

five_busiest_plot

  
```

### Significant savings potential {data-width=50}

If Houston - Dallas had *average* rate of seats filled, it would have saved around **104,000 empty seats** in 2009

Overall market data row
-----------------------

### Overall passenger volume has decreased recently 

```{r}
pop_trend_data <- com_flights_df %>% 
  group_by(year) %>% 
  summarise(tot_passenger_vol = sum(passengers))

pop_trend_plot <- ggplot(pop_trend_data, mapping = aes(x = year, y = tot_passenger_vol/1e6)) +
  geom_line(color = 'grey') +
  geom_line(filter(pop_trend_data, year > 2006), 
            mapping = aes(x = year, y = tot_passenger_vol/1e6),
            size = 1.5, 
            color = '#750000')

pop_trend_plot <- pop_trend_plot + 
  geom_point(data = filter(pop_trend_data, year == 2009), 
             mapping = aes(x = year, y = tot_passenger_vol/1e6), 
             fill = '#344182', shape = 25, size = 3) +
  geom_text(aes(x = 2010, y = tot_passenger_vol[year == 2009]/1e6 - 10), label = '526.3M', color = '#344182' )

#get percent decrease over last few years
# (pop_trend_data$tot_passenger_vol[20] - pop_trend_data$tot_passenger_vol[18]) / 
#   pop_trend_data$tot_passenger_vol[18] * 100

pop_trend_plot + theme_classic() +
  theme(axis.line.y = element_blank(),
        axis.line.x = element_blank(),
        axis.title.x  = element_blank(),
        plot.title = element_text(color = "#808080",
                                  size = 16, face = "bold"),
        plot.subtitle = element_text(color = "#808080", 
                                     size = 12, face = "plain"),
        axis.text.x = element_text(color = "#808080", size = 11),
        axis.text.y = element_text(color = "#808080", size = 11),
        axis.title = element_text(color = "#808080", size = 11, face = "plain"),
        legend.title = element_text(color = "#808080", size = 12, face = "plain"),
        legend.text = element_text(color = "#808080", size = 12, face = "plain"),
        plot.caption = element_text(color = "#808080", size = 10, face = "plain", margin = margin(t=10))
        ) +
  scale_x_continuous(breaks = seq(1990,2010, by = 6)) +
  labs(title = 'Total passenger volume is falling',
       subtitle = expression(paste('Down ',
                                   italic(bold('9.1%')),
                                   ' from 2007 peak')),
       y = 'Millions of Passengers')


```

### Despite volume drop, operating efficiency is up 


```{r}
filled_trend_data <- com_flights_df %>% 
  group_by(year) %>% 
  summarise(avg_seat_fill_perc = mean(seat_fill_perc))

filled_trend_plot <- ggplot(filled_trend_data, 
                            mapping = aes(x = year,
                                          y = avg_seat_fill_perc/100)) +
  geom_line(color = 'grey') +
  geom_line(filter(filled_trend_data, year > 2006), 
            mapping = aes(x = year, y = avg_seat_fill_perc/100),
            size = 1.5, 
            color = '#344182')

filled_trend_plot <- filled_trend_plot + 
  geom_point(data = filter(filled_trend_data, year == 2009), 
             mapping = aes(x = year, y = avg_seat_fill_perc/100), 
             fill = '#344182', shape = 25, size = 3) +
  geom_text(aes(x = 2010, y = avg_seat_fill_perc[year == 2009]/100 - 0.01), label = '71%', color = '#344182' )


filled_trend_plot + theme_classic() +
  theme(axis.line.y = element_blank(),
        axis.line.x = element_blank(),
        axis.title  = element_blank(),
        plot.title = element_text(color = "#808080",
                                  size = 16, face = "bold"),
        plot.subtitle = element_text(color = "#808080", 
                                     size = 12, face = "plain"),
        axis.text.x = element_text(color = "#808080", size = 11),
        axis.text.y = element_text(color = "#808080", size = 11),
        legend.title = element_text(color = "#808080", size = 12, face = "plain"),
        legend.text = element_text(color = "#808080", size = 12, face = "plain"),
        plot.caption = element_text(color = "#808080", size = 10, face = "plain", margin = margin(t=10))
        ) +
  scale_x_continuous(breaks = seq(1990,2010, by = 6)) +
  scale_y_continuous(labels = percent_format()) +
  labs(title = 'Percentage of seats filled has gone up',
       subtitle = 'Up 1 percentage point from 2007 total passenger peak')
```